{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "from pyquery import PyQuery as pq\n",
    "import re\n",
    "import csv\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "book_html = \"\"\"\n",
    "<tr class=\"item\">\n",
    "          <td width=\"100\" valign=\"top\">\n",
    "            <a class=\"nbg\" href=\"https://book.douban.com/subject/1007305/\" onclick=\"moreurl(this,{i:'0'})\">\n",
    "              <img src=\"https://img1.doubanio.com/view/subject/s/public/s1070959.jpg\" width=\"90\"/>\n",
    "            </a>\n",
    "          </td>\n",
    "          <td valign=\"top\">\n",
    "            \n",
    "            <div class=\"pl2\">\n",
    "\n",
    "\n",
    "              <a href=\"https://book.douban.com/subject/1007305/\" onclick=\"&quot;moreurl(this,{i:'0'})&quot;\" title=\"红楼梦\">\n",
    "                红楼梦\n",
    "\n",
    "                \n",
    "              </a>\n",
    "\n",
    "\n",
    "\n",
    "                  <img src=\"/pics/read.gif\" alt=\"可试读\" title=\"可试读\"/>\n",
    "\n",
    "              \n",
    "            </div>\n",
    "\n",
    "              <p class=\"pl\">[清] 曹雪芹 著 / 人民文学出版社 / 1996-12 / 59.70元</p>\n",
    "\n",
    "            \n",
    "\n",
    "              \n",
    "              <div class=\"star clearfix\">\n",
    "                  <span class=\"allstar50\"/>\n",
    "                  <span class=\"rating_nums\">9.6</span>\n",
    "\n",
    "                <span class=\"pl\">(\n",
    "                    413885人评价\n",
    "                )</span>\n",
    "              </div>\n",
    "\n",
    "            \n",
    "              <p class=\"quote\" style=\"margin: 10px 0; color: #666\">\n",
    "                  <span class=\"inq\">都云作者痴，谁解其中味？</span>\n",
    "              </p>\n",
    "\n",
    "\n",
    "          </td>\n",
    "        </tr>\n",
    "\"\"\"\n",
    "\n",
    "doc = pq(book_html)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据解析"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- **书名**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "红楼梦\n"
     ]
    }
   ],
   "source": [
    "book_name = doc('div.pl2 a').text()\n",
    "print(book_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "--------------------------------------------------------------"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- **```基本信息```**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[清] 曹雪芹 著 / 人民文学出版社 / 1996-12 / 59.70元\n"
     ]
    }
   ],
   "source": [
    "info = doc('p.pl').text()\n",
    "print(info)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['[清] 曹雪芹 著 ', ' 人民文学出版社 ', ' 1996-12 ', ' 59.70元']"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "info_list = info.split('/')\n",
    "info_list"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- **价格**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "' 59.70元'"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "price = info_list[-1]\n",
    "price"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- **出版时间**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "' 1996-12 '"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pub_time = info_list[-2]\n",
    "pub_time"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- **出版社**\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "' 人民文学出版社 '"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "publisher = info_list[-3]\n",
    "publisher"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- **作者**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'[清] 曹雪芹 著 '"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "authors = ','.join(info_list[:-3])\n",
    "authors"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "------------------------------------------------------------"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- **评分**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'9.6'"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rating_nums = doc('span.rating_nums').text()\n",
    "rating_nums"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- **评分人数**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'( 413885人评价 )'"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rating_count_raw = doc('span.pl').text()\n",
    "rating_count_raw"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'413885'"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rating_count = re.findall('\\d+', rating_count_raw)[0]\n",
    "rating_count"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- **介绍**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'都云作者痴，谁解其中味？'"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "quote = doc('p.quote span.inq').text()\n",
    "quote"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 将以上结果保存为一个字典"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'book_name': '红楼梦', 'info': '[清] 曹雪芹 著 / 人民文学出版社 / 1996-12 / 59.70元', 'price': ' 59.70元', 'pub_time': ' 1996-12 ', 'publisher': ' 人民文学出版社 ', 'authors': '[清] 曹雪芹 著 ', 'rating_nums': '9.6', 'rating_count': '413885', 'quote': '都云作者痴，谁解其中味？'}\n"
     ]
    }
   ],
   "source": [
    "book_info = {\n",
    "    'book_name': book_name,\n",
    "    'price': price,\n",
    "    'pub_time': pub_time,\n",
    "    'publisher': publisher,\n",
    "    'authors': authors,\n",
    "    'rating_nums': rating_nums,\n",
    "    'rating_count': rating_count,\n",
    "    'quote': quote\n",
    "}\n",
    "\n",
    "print(book_info)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 将字典内容保存为本地csv格式文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 打开名为 'book_info.csv' 的文件，如果不存在则创建，并使用读写模式 'a+'，同时指定编码方式为 'utf-8'，并设置 newline 参数为空，以避免额外的空行\n",
    "file = open('book_info.csv', 'a+', encoding='utf-8', newline='')\n",
    "\n",
    "# 从 book_info 字典中获取字段名列表，用于写入CSV文件的标题行\n",
    "fieldnames = book_info.keys()\n",
    "\n",
    "# 创建一个 DictWriter 对象，用于将字典数据写入CSV文件，传入文件对象 file 和字段名列表 fieldnames\n",
    "writer = csv.DictWriter(file, fieldnames=fieldnames)\n",
    "\n",
    "# 写入CSV文件的标题行，即字段名列表\n",
    "writer.writeheader()\n",
    "\n",
    "# 将 book_info 字典写入CSV文件\n",
    "writer.writerow(book_info)\n",
    "\n",
    "# 关闭文件\n",
    "file.close()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
